rm(list=ls())


library(data.table)
library(stringr)
library(urltools)
library(lubridate)
library(dplyr)
library(ggplot2)
library(ggpubr)
library(lfe)
library(stargazer)
library(fixest)
library(scales)
library(ggpattern)

# Define color palettes for plots
safe_colorblind_palette1 <- c("#88CCEE", "#CC6677", "#DDCC77", "#117733", "#332288", "#AA4499", 
                             "#44AA99", "#999933", "#882255", "#661100", "#6699CC", "#888888", "#D55E00")
safe_colorblind_palette2 <- c("#DDCC77","#332288", "#AA4499", 
                             "#44AA99", "#999933", "#882255", "#661100", "#6699CC", "#888888", "#D55E00")
safe_colorblind_palette3 <- c("#332288", "#AA4499", 
                              "#44AA99", "#999933", "#882255", "#661100", "#6699CC", "#888888", "#D55E00")

#scales::show_col(safe_colorblind_palette)
green_palette <- c("#117733", "#18aa49", "#20de5f", "#52e684", "#86eea8")
purple_palette <- c("#332288", "#452eb7", "#634dd3", "#8d7cde", "#b6abea")

setwd("path to workspace")
#"10052024_incentive_data.csv"  "10052024_ml_experiment.csv"   "10052024_quiz_regression.csv"

##############################################
##### Monetary Incentive Data ################
##############################################
dt <- fread("10052024_incentive_data.csv")
head(dt)

dt[, condition_label := case_when(group == 0 ~ "No Bonus",
                                  group == 2 ~ "2-cent",
                                  group == 10 ~ "10-cent",
                                  group == 20 ~ "20-cent")]
table(dt$group,dt$condition_label)
table(dt$condition_label)

dt[,condition_factor := factor(condition_label, levels = c("No Bonus", "2-cent", "10-cent", "20-cent"))]

# Performance on treatment dummies
dt_lm <- feols(performance ~ condition_factor - 1, data=dt, cluster = "user_id")
etable(dt_lm)
dt_reg <- data.table(cbind(names(dt_lm$coefficients), dt_lm$coefficients,confint(dt_lm)))
setnames(dt_reg, c("condition","coeff", "ci_lb", "ci_ub"))
dt_reg[,condition := gsub("condition_factor","",condition)]
dt_reg[, condition:= factor(condition, levels = c("No Bonus", "2-cent", "10-cent", "20-cent"))]
gp <- ggplot(dt_reg, aes(x=condition, y=coeff, fill = condition)) +
  geom_bar( aes(), stat="identity", alpha=0.9) + 
  geom_errorbar( aes(ymin=ci_lb, ymax=ci_ub), width=0.3, colour="black", alpha=0.9, linewidth=.7) + 
  geom_text(aes(x=condition, y=coeff, label = paste0(round(coeff*100,1),"%")), hjust = -0.1, vjust = -0.05, size = 6) + 
  theme_classic(base_size = 18) + ylab(paste0("Performance (% of Maximum Value)")) + theme(legend.position = "none", axis.title.x=element_blank()) + 
  scale_fill_manual(values = safe_colorblind_palette1) + scale_y_continuous(labels = percent_format()) + coord_cartesian(ylim = c(0.75, .96))
gp
ggsave(gp, file = paste0("incentives_barplot.pdf"), width = 6.1, height = 8)  

# Time on treatment dummies
dt_lm <- feols(time_spent/60 ~ condition_factor - 1, data=dt, cluster = "user_id")
etable(dt_lm)
dt_reg <- data.table(cbind(names(dt_lm$coefficients), dt_lm$coefficients,confint(dt_lm)))
setnames(dt_reg, c("condition","coeff", "ci_lb", "ci_ub"))
dt_reg[,condition := gsub("condition_factor","",condition)]
dt_reg[, condition:= factor(condition, levels = c("No Bonus", "2-cent", "10-cent", "20-cent"))]
gp <- ggplot(dt_reg, aes(x=condition, y=coeff, fill = condition)) +
  geom_bar( aes(), stat="identity", alpha=0.9) + 
  geom_errorbar( aes(ymin=ci_lb, ymax=ci_ub), width=0.3, colour="black", alpha=0.9, linewidth=.7) + 
  geom_text(aes(x=condition, y=coeff, label = round(coeff,2)), hjust = -0.1, vjust = -0.05, size = 6) + 
  theme_classic(base_size = 18) + ylab(paste0("Time Spent (in Minutes)")) + theme(legend.position = "none", axis.title.x=element_blank()) + 
  scale_fill_manual(values = safe_colorblind_palette1) #+ scale_y_continuous(labels = percent_format()) + coord_cartesian(ylim = c(0.75, .95))
gp
ggsave(gp, file = paste0("incentives_timebarplot.pdf"), width = 6.1, height = 8)  

##############################################
##### Model Recommendations Incentive Data ################
##############################################
dt <- fread("data/10052024_ml_experiment.csv")
head(dt)
table(dt$treatment)

dt[, condition_label := case_when(treatment %like% "no_ml" ~ "No ML",
                                  treatment %like% "q1" ~ "q1 (72%)",
                                  treatment %like% "q2" ~ "q2 (80%)",
                                  treatment %like% "q3" ~ "q3 (84%)",
                                  treatment %like% "q4" ~ "q4 (88%)",
                                  treatment %like% "q5" ~ "q5 (90%)",
                                  treatment %like% "q6" ~ "q6 (92%)")]
table(dt$treatment,dt$condition_label)
table(dt$condition_label)
dt[,condition_factor := factor(condition_label)]

# Performance on treatment dummies
dt_lm <- feols(performance ~ condition_factor - 1, data=dt, cluster = "user_id")
etable(dt_lm)
dt_reg <- data.table(cbind(names(dt_lm$coefficients), dt_lm$coefficients,confint(dt_lm)))
setnames(dt_reg, c("condition","coeff", "ci_lb", "ci_ub"))
dt_reg
dt_reg[,condition := gsub("condition_factor","",condition)]
gp <- ggplot(dt_reg, aes(x=condition, y=coeff, fill = condition)) +
  geom_bar( aes(), stat="identity", alpha=0.9) + 
  geom_errorbar( aes(ymin=ci_lb, ymax=ci_ub), width=0.3, colour="black", alpha=0.9, linewidth=.7) + 
  geom_text(aes(x=condition, y=coeff, label = paste0(round(coeff*100,1),"%")), hjust = -0.1, vjust = -0.05, size = 6) + 
  theme_classic(base_size = 18) + ylab(paste0("Performance (% of Maximum Value)")) + theme(legend.position = "none", axis.title.x=element_blank()) + 
  scale_fill_manual(values = safe_colorblind_palette2) + scale_y_continuous(labels = percent_format()) + coord_cartesian(ylim = c(0.75, .96))
gp
ggsave(gp, file = paste0("output/ml_barplot.pdf"), width = 10, height = 8)  

# Time on treatment dummies
dt_lm <- feols(time_spent/60 ~ condition_factor - 1, data=dt, cluster = "user_id")
etable(dt_lm)
dt_reg <- data.table(cbind(names(dt_lm$coefficients), dt_lm$coefficients,confint(dt_lm)))
setnames(dt_reg, c("condition","coeff", "ci_lb", "ci_ub"))
dt_reg
dt_reg[,condition := gsub("condition_factor","",condition)]
gp <- ggplot(dt_reg, aes(x=condition, y=coeff, fill = condition)) +
  geom_bar( aes(), stat="identity", alpha=0.9) + 
  geom_errorbar( aes(ymin=ci_lb, ymax=ci_ub), width=0.3, colour="black", alpha=0.9, linewidth=.7) + 
  geom_text(aes(x=condition, y=coeff, label = round(coeff,2)), hjust = -0.1, vjust = -0.05, size = 6) + 
  theme_classic(base_size = 18) + ylab(paste0("Time Spent (in Minutes)")) + theme(legend.position = "none", axis.title.x=element_blank()) + 
  scale_fill_manual(values = safe_colorblind_palette2) #+ scale_y_continuous(labels = percent_format()) + coord_cartesian(ylim = c(0.75, .95))
gp
ggsave(gp, file = paste0("ml_timebarplot.pdf"), width = 10, height = 8)  

# Best performance on treatment dummies
dt_lm <- feols(performance ~ condition_factor - 1, data=dt[condition_factor!="No ML"], cluster = "user_id")
etable(dt_lm)
dt_reg <- data.table(cbind(names(dt_lm$coefficients), dt_lm$coefficients,confint(dt_lm)))
setnames(dt_reg, c("condition","coeff", "ci_lb", "ci_ub"))
dt_reg
dt_reg[,condition := gsub("condition_factor","",condition)]

dt_lm <- feols(adjusted_performance ~ condition_factor - 1, data=dt[condition_factor!="No ML"], cluster = "user_id")
etable(dt_lm)
dt_reg1 <- data.table(cbind(names(dt_lm$coefficients), dt_lm$coefficients,confint(dt_lm)))
setnames(dt_reg1, c("condition","coeff", "ci_lb", "ci_ub"))
dt_reg1[,condition := gsub("condition_factor","",condition)]

dt_reg[,source := "real"]
dt_reg1[,source := "adjusted"]
dt_reg <- rbind(dt_reg,dt_reg1) 
dt_reg[, source:= factor(source, levels = c("real", "adjusted"))]

gp <- ggplot(dt_reg, aes(x = condition, y = coeff, fill = condition, group = interaction(condition, source))) +
  geom_bar_pattern(pattern = ifelse(dt_reg$source == "real", "none", "stripe"), 
                   pattern_angle = 45,  # Angle of stripes
                   pattern_spacing = 0.02,  # Spacing for patterns
                   pattern_density = 0.4,
                   stat = "identity", position = position_dodge(width = 0.9)) +
  geom_errorbar( data=dt_reg, aes(x=condition, ymin=ci_lb, ymax=ci_ub), width=0.3, colour="black", alpha=0.9, linewidth=.7, position = position_dodge(width = 0.9)) + 
  geom_text(data=dt_reg, aes(x=condition, y=coeff, label = paste0(round(coeff*100,1),"%")), hjust = +0.5, vjust = -1.5, size = 6, position = position_dodge(width = 0.9)) + 
  theme_classic(base_size = 18) + ylab(paste0("Performance (% of Maximum Value)")) + theme(legend.position = "none", axis.title.x=element_blank()) + 
  scale_fill_manual(values = safe_colorblind_palette1) + scale_y_continuous(labels = percent_format()) + coord_cartesian(ylim = c(0.75, .96))
gp 
ggsave(gp, file = paste0("output/ml2_barplot.pdf"), width = 10, height = 8) 


#### Combined barplot

# Calculate coefficients for all conditions but do not exclude "No ML" for the combined plot
dt_lm_all <- feols(performance ~ condition_factor - 1, data=dt, cluster = "user_id")
dt_reg_all <- data.table(cbind(names(dt_lm_all$coefficients), dt_lm_all$coefficients, confint(dt_lm_all)))
setnames(dt_reg_all, c("condition","coeff", "ci_lb", "ci_ub"))
dt_reg_all[, condition := gsub("condition_factor", "", condition)]
dt_reg_all
# Calculate coefficients for real and adjusted performance, excluding "No ML"
dt_lm_real <- feols(performance ~ condition_factor - 1, data=dt[condition_factor!="No ML"], cluster = "user_id")
dt_reg_real <- data.table(cbind(names(dt_lm_real$coefficients), dt_lm_real$coefficients, confint(dt_lm_real)))
setnames(dt_reg_real, c("condition","coeff", "ci_lb", "ci_ub"))
dt_reg_real[, condition := gsub("condition_factor", "", condition)]

dt_lm_adjusted <- feols(adjusted_performance ~ condition_factor - 1, data=dt[condition_factor!="No ML"], cluster = "user_id")
dt_reg_adjusted <- data.table(cbind(names(dt_lm_adjusted$coefficients), dt_lm_adjusted$coefficients, confint(dt_lm_adjusted)))
setnames(dt_reg_adjusted, c("condition","coeff", "ci_lb", "ci_ub"))
dt_reg_adjusted[, condition := gsub("condition_factor", "", condition)]

# Mark sources
dt_reg_real[, source := "real"]
dt_reg_adjusted[, source := "adjusted"]
dt_reg_combined <- rbind(dt_reg_real, dt_reg_adjusted)

# Include "No ML" condition from dt_reg_all
dt_reg_combined <- rbind(dt_reg_all[condition == "No ML"], dt_reg_combined, fill = T)
dt_reg_combined[, source := factor(source, levels = c("real", "adjusted"))]
dt_reg_combined[condition == "No ML", source := "real"]

# Generate the plot
gp_combined <- ggplot(dt_reg_combined, aes(x = condition, y = coeff, fill = condition, group = interaction(condition, source))) +
  geom_bar_pattern(pattern = ifelse(dt_reg_combined$source == "real", "none", "stripe"), 
                   pattern_angle = 45, pattern_spacing = 0.02, pattern_density = 0.4,
                   stat = "identity", position = position_dodge(width = 0.9)) +
  geom_errorbar(aes(x=condition, ymin=ci_lb, ymax=ci_ub), width=0.3, colour="black", alpha=0.9, linewidth=.7, position = position_dodge(width = 0.9)) + 
  geom_text(aes(x=condition, y=coeff, label = paste0(round(coeff*100,1),"%")), hjust = 0.5, vjust = -1.5, size = 6, position = position_dodge(width = 0.9)) + 
  theme_classic(base_size = 18) + ylab("Performance (% of Maximum Value)") + theme(legend.position = "none", axis.title.x=element_blank()) + 
  scale_fill_manual(values = safe_colorblind_palette2) + scale_y_continuous(labels = percent_format()) + coord_cartesian(ylim = c(0.75, .96))
gp_combined
# Save the combined plot
ggsave(gp_combined, file = "ml_combined_barplot.pdf", width = 10, height = 8)

##############################################
##### Testing Effect of Quiz ################
##############################################
dt <- fread("10052024_ml_experiment.csv")
head(dt)
table(dt$treatment)

dt_inc <- fread("10052024_incentive_data.csv")
head(dt_inc)

dt_inc[, condition_label := case_when(group == 0 ~ "No Bonus",
                                  group == 2 ~ "2-cent",
                                  group == 10 ~ "10-cent",
                                  group == 20 ~ "20-cent")]


dt_ml <- fread("10052024_ml_experiment.csv")
head(dt_ml)

dt_ml[, condition_label := case_when(treatment %like% "no_ml" ~ "No ML",
                                  treatment %like% "q1" ~ "q1 (72%)",
                                  treatment %like% "q2" ~ "q2 (80%)",
                                  treatment %like% "q3" ~ "q3 (84%)",
                                  treatment %like% "q4" ~ "q4 (88%)",
                                  treatment %like% "q5" ~ "q5 (90%)",
                                  treatment %like% "q6" ~ "q6 (92%)")]

dt <- rbind(dt_inc[condition_label=="10-cent"],
            dt_ml[condition_label=="No ML"], fill = T)

reg <- feols(performance ~ condition_label, data=dt, cluster = "user_id")

etable(reg)


##############################################
##### Estimating Deltas ################
##############################################
dt <- fread("10052024_ml_experiment.csv")
head(dt)
table(dt$treatment)

dt[, condition_label := case_when(treatment %like% "no_ml" ~ "No ML",
                                  treatment %like% "q1" ~ "q1 (72%)",
                                  treatment %like% "q2" ~ "q2 (80%)",
                                  treatment %like% "q3" ~ "q3 (84%)",
                                  treatment %like% "q4" ~ "q4 (88%)",
                                  treatment %like% "q5" ~ "q5 (90%)",
                                  treatment %like% "q6" ~ "q6 (92%)")]
table(dt$treatment,dt$condition_label)
table(dt$condition_label)
dt[,condition_factor := factor(condition_label)]


reg1 <- feols(is_opt ~ human, data = rbind(dt[condition_label %like% "q1",.(is_opt, human = 1, user_id)],
                                                   dt[condition_label %like% "q1",.(is_opt = ai_opt_rate, human = 0, user_id)]), cluster = "user_id")

reg2 <- feols(is_opt ~ human, data = rbind(dt[condition_label %like% "q2",.(is_opt, human = 1, user_id)],
                                                dt[condition_label %like% "q2",.(is_opt = ai_opt_rate, human = 0, user_id)]), cluster = "user_id")

reg3 <- feols(is_opt ~ human, data = rbind(dt[condition_label %like% "q3",.(is_opt, human = 1, user_id)],
                                                dt[condition_label %like% "q3",.(is_opt = ai_opt_rate, human = 0, user_id)]), cluster = "user_id")

reg4 <- feols(is_opt ~ human, data = rbind(dt[condition_label %like% "q4",.(is_opt, human = 1, user_id)],
                                                dt[condition_label %like% "q4",.(is_opt = ai_opt_rate, human = 0, user_id)]), cluster = "user_id")

reg5 <- feols(is_opt ~ human, data = rbind(dt[condition_label %like% "q5",.(is_opt, human = 1, user_id)],
                                                dt[condition_label %like% "q5",.(is_opt = ai_opt_rate, human = 0, user_id)]), cluster = "user_id")

reg6 <- feols(is_opt ~ human, data = rbind(dt[condition_label %like% "q6",.(is_opt, human = 1, user_id)],
                                                dt[condition_label %like% "q6",.(is_opt = ai_opt_rate, human = 0, user_id)]), cluster = "user_id")


etable(reg1, reg2, reg3, reg4, reg5, reg6)

p_values <- lapply(list(reg1, reg2, reg3, reg4, reg5, reg6), function(model) {
  coef_table <- summary(model)$coeftable
  pval <- coef_table[, "Pr(>|t|)"]
  return(pval)
})

# Convert the list of p-values to a data frame
p_values_df <- do.call(cbind, p_values)
colnames(p_values_df) <- paste0("reg", 1:6)

# Display the p-values
print(p_values_df)

